# coding:utf-8

import re

# 处理文本
def deal_content(content_list):
    content = '<p>'.join(content_list).replace('<p>\r\n', '').replace('\t', '').strip()
    return content


# 处理时间
def deal_time(spider_time):
    return spider_time.encode('utf-8').replace('年', '-').replace('月', '-').replace('日', '').strip()

# 处理时间
def deal_date(spider_time):
    date_str = spider_time.encode('utf-8').replace('年', '-').replace('月', '-').replace('日', '')
    news_date = "".join(re.findall(r"[\d]+-[\d]+-[\d]+", date_str, re.S))
    news_time_temp = "".join(re.findall(r"[\d]+:[\d]+[:\d]*", date_str, re.S))

    news_time = "00:00:00"
    if news_time_temp:
        if re.match(r"^\d\d:\d\d$", news_time_temp):
            news_time = news_time_temp + ":00"
        else:
            news_time = news_time_temp
    return news_date + " " + news_time


# 处理时间
def deal_rm_time(item_time):
    if '-' == item_time[7]:
        pass
    else:
        item_time = item_time[:5] + '0' + item_time[5:]
    if ' ' == item_time[10]:
        pass
    else:
        item_time = item_time[:8] + '0' + item_time[8:]
    if ':' == item_time[13]:
        pass
    else:
        item_time = item_time[:11] + '0' + item_time[11:]
    if ':' == item_time[16]:
        pass
    else:
        item_time = item_time[:14] + '0' + item_time[14:]
    if len(item_time) == 20:
        pass
    else:
        item_time = item_time[:17] + '0' + item_time[17:]
    return item_time


# 处理图片
def deal_img(href, spider_img):
    img_list = []
    for i in spider_img:
        img_list.append((href + i).encode('utf-8').strip())
    return img_list

